#load (install if required) packages
pacman::p_load(dplyr, tidyr, purrr, sf, here, glue, stringr, tmap, ggplot2)
#update package
#pak::pak("add-am/RcTools")
#load the package
library(RcTools)
#turn off spherical geometry
sf_use_s2(FALSE)
#turn off scientific notation
options(scipen = 999)
#define data source path
data_path <- here("data/")
#define output path
output_path <- here("outputs/")
#create some folders for saving plots and maps
dir.create(glue("{output_path}/maps/"))
dir.create(glue("{output_path}/plots/"))Introduction
This script contains the methods used to wrangle, analyse and present freshwater wetland data in the Northern Three regions. Freshwater wetland data is currently used within the freshwater habitat and hydrology section of the technical report. The amount of freshwater wetlands lost/gained is the metric assessed. Therefore the main objectives of this script are to:
- Define vegetation that is classified as “freshwater wetlands”
- Select only this data
- Calculate the total amount of freshwater wetland coverage for each dataset.
- Create a tabular summary of the coverage
- Create maps of the coverage
- Create plots of the coverage and coverage change.
Script Set Up
Packages and environment options are defined below.
Load Data
Three datasets are required:
- Spatial data specific to the N3 region - such as the region, basin, and sub basin boundaries.
- A Qld border to use as a background to maps.
- Wetlands data.
The N3 data is lightweight and is loaded below.
#load the n3 specific data
n3_region <- st_read(glue("{data_path}/n3_region.gpkg")) |>
st_transform("EPSG:7855") |>
st_make_valid()
#streamline the regions dataset
n3_wl_region <- n3_region |>
filter(Environment != "Marine") |>
group_by(Region, BasinOrZone, SubBasinOrSubZone) |>
summarise(geom = st_union(geom))The Qld border is pulled from the RcTools package
data("qld") The raw wetland files are very large and can’t all be held in memory at the same time. Instead, these are done in stages, croppped, and resaved, and then a smaller version can be loaded. If the smaller versions already exists, the crop is skipped.
#list layers in dataset
wetlands_01_19_layers <- st_layers(glue("{data_path}/raw/wetland_areas_2001-2019.gdb"))
#filter for layers with "area"
targets <- str_detect(wetlands_01_19_layers[[1]], "AREAS_2")
#extract those layers
wetlands_01_19_layers <- wetlands_01_19_layers[[1]][targets]
#convert to a list
wetlands_01_19_layers <- as.list(wetlands_01_19_layers)
#include the path to the 2021 data
all_wetland_layers <- c(wetlands_01_19_layers, glue("{data_path}/raw/wetland_areas_2021.gpkg"))
#create a list of the cropped files
cropped_wl_files <- list.files(glue("{data_path}/processed/"), pattern = "cropped")
#if the list of cropped files is shorter than the list of full files, proceeded with the crop and clean
if (length(all_wetland_layers) > length(cropped_wl_files)){
#map over each of the targets
map(all_wetland_layers, \(x) {
#determine the year based on the name
target_year <- str_extract(x, "\\d{4}")
#if the target has the gpkg extension, nothing needs to be done for conversions
if (str_detect(x, ".gpkg")) {target_layer <- st_read(x)}
#otherwise, use the gdb method and convert
if (!str_detect(x, ".gpkg")) {
target_layer <- st_read(glue("{data_path}/raw/wetland_areas_2001-2019.gdb"), layer = x)
#update the crs of the data
target_layer <- st_transform(target_layer, "EPSG:7844")
#create temporary files
tmp1 <- tempfile(fileext = ".gpkg")
tmp2 <- tempfile(fileext = ".gpkg")
#write the original file to the first temporary location
sf::st_write(target_layer, tmp1, quiet = TRUE)
#use gdal to convert the geometry types from temp file 1 to temp file 2
gdalUtilities::ogr2ogr(src = tmp1, dst = tmp2, f = "GPKG", nlt = "MULTIPOLYGON")
#load the second temporary file back in
target_reload <- st_read(tmp2, quiet = TRUE)
#reattach original attributes
target_layer <- st_sf(
st_drop_geometry(target_layer),
geom = st_geometry(target_reload)
)
}
#update crs
target_layer <- st_transform(target_layer, "EPSG:7855")
#intersect, and include a layer column
cropped_target_layer <- target_layer |>
st_intersection(n3_wl_region) |>
mutate(Year = target_year)
#save the file
st_write(cropped_target_layer, glue("{data_path}/processed/wetlands_{target_year}_cropped.gpkg"), delete_dsn = TRUE)
})
}
#once everything is done the environment should be cleaned to save on memory usage
rm(list = setdiff(ls(), c("n3_region", "n3_wl_region", "data_path", "output_path")))Once all cropping is done, the smaller files can all be loaded in together, they will each then undergo further cleaning and filtering, specifically the steps that occurs are as follows:
- Select:
- Only palustrine (fresh) wetlands: (WETCLASS_ == “Palustrine”),
- That cover >=80% of their polygon (i.e. they are the dominant wetland type),
- And are completely natural: (HYDROMOD == “H1”).
- Merge (union) all polygons belonging to the same group to improve the look of maps. Takes a long time.
- Calculate the total area covered by each of the groups of wetlands. Takes a long time.
#create a list of the cropped files, this code is run again in case the files didn't exists the first time
cropped_wl_paths <- list.files(glue("{data_path}/processed/"), pattern = "cropped", full.names = TRUE)
#create a list of the cropped and filtered files
crop_filt_wl_paths <- list.files(glue("{data_path}/processed/"), pattern = "final")
#if the number of final files is less than the number of cropped files, proceed with the additional filtering
if (length(cropped_wl_paths) > length(crop_filt_wl_paths)){
#load all files
cropped_wl_files <- map(cropped_wl_paths, st_read)
#for each item in the list
crop_filt_wl_files <- map(cropped_wl_files, \(x) {
#create a look up table to optionally rename
lookup <- c("ECO_WSYS_H" = "eco_wsys_h", "HYD_MOD_H" = "hyd_mod_h", "OTH_WPCT_H" = "oth_wpct_h")
#filter for specific groups and clean up columns
x <- x |>
rename(any_of(lookup)) |>
filter(ECO_WSYS_H == "PAL", HYD_MOD_H == "H1", OTH_WPCT_H == "DOM") |>
rename(Vegetation = ECO_WSYS_H)
#calculate area, take only polygons, and rename variables
x <- x |>
group_by(Region, BasinOrZone, SubBasinOrSubZone, Year, Vegetation) |>
summarise(geom = st_union(geom)) |>
mutate(AreaM = st_area(geom)) |>
st_collection_extract(type = "POLYGON", warn = FALSE) |>
mutate(Vegetation = "Wetlands")
#update units to be km2
x$AreaKm <- units::set_units(x$AreaM, km^2)
#drop the meters version
x <- x |> select(!AreaM)
})
#save each file
crop_filt_wl_paths <- str_replace(cropped_wl_paths, "cropped", "final")
walk2(crop_filt_wl_files, crop_filt_wl_paths, \(x,y) st_write(x, y))
}With all processing done, files can then be read in for analysis.
#create a list of the cropped and filtered files
crop_filt_wl_paths <- list.files(glue("{data_path}/processed/"), pattern = "final", full.names = TRUE)
#load all files in and then combine into a single dataset
all_wetlands <- bind_rows(map(crop_filt_wl_paths, st_read))Analyse Data
There are two key statistics to calculate:
- Total Area
- Change in Area over time
::: {.cell}
#update units (units are missing on repeat runs of the same script)
all_wetlands$AreaKm <- units::set_units(all_wetlands$AreaKm, km^2)
#drop geometry (improves processing speed), then ensure each sub basin only has one area value
all_wl_area_change <- all_wetlands |>
st_drop_geometry() |>
group_by(Region, BasinOrZone, SubBasinOrSubZone, Year) |>
summarise(AreaKm = sum(AreaKm))
#create a 0 value with units for replacements
replace_value <- units::set_units(0, km^2)
#create rows for the Burdekin, Black and Ross overall (currently divivided by sub basin)
basin_agg <- all_wl_area_change |>
filter(BasinOrZone != SubBasinOrSubZone) |>
group_by(Region, BasinOrZone, Year) |>
summarise(AreaKm = sum(AreaKm)) |>
mutate(SubBasinOrSubZone = BasinOrZone)
#join the new basin rows back to the main dataset
all_wl_area_change <- bind_rows(all_wl_area_change, basin_agg)
#perform the area comparison
all_wl_area_change <- all_wl_area_change |>
arrange(SubBasinOrSubZone, Year) |> #order by sub basin and year
group_by(SubBasinOrSubZone) |> #group by sub basin
mutate(AreaChange = AreaKm - lag(AreaKm), #compare the area for the row to the one above using lag()
PercentChange = (AreaChange/lag(AreaKm))*100) |>
mutate(across(matches("Percent"), \(x) as.vector(x)), #replace na values with 0
across(matches("Percent"), \(x) replace_na(x, 0)),
across(matches("Area"), \(x) replace_na(x, replace_value)),
across(where(is.numeric), \(x) round(x, 5))) |>
ungroup():::
Standardised Scores and Grades
Following this, the year on year changes can then be converted to standardised scores.
#remove units from column
all_wl_area_change <- all_wl_area_change |>
mutate(AreaChange = units::drop_units(AreaChange))
#apply the custom value_to_score function from the RcTools package
wl_scored <- all_wl_area_change |>
value_to_score(
value = AreaChange,
value_type = "Wetlands"
)
#clean and organise the table
wl_scored <- wl_scored |>
mutate(
AreaKm = round(AreaKm, 1),
AreaChange = round(AreaChange, 1),
PercentChange = round(PercentChange, 1),
AreaChangeScore = round(AreaChangeScore, 1)
) |>
arrange(Region, BasinOrZone, SubBasinOrSubZone)Subsequently, the grades can be calculated.
#grade using the function from RcTools
wl_graded <- score_to_grade(wl_scored, AreaChangeScore)And finally, the full table can be saved, as well as a simplified version for publication in the technical report.
#save using the function from RcTools
save_n3_table(
wl_graded,
glue("{output_path}/long_data"),
target_columns = 8:9,
target_rows = 1:nrow(wl_graded),
scheme = "Report Card",
include_letter = TRUE
)
#create a tech report version (essentially just a wide version)
pub_ready <- wl_graded |>
pivot_wider(names_from = Year, values_from = c(AreaKm, AreaChange, PercentChange, AreaChangeScore, AreaChangeGrade), names_sep = "")
#save using the function from RcTools
save_n3_table(
pub_ready,
glue("{output_path}/wide_data"),
target_columns = 25:ncol(pub_ready),
target_rows = 1:nrow(pub_ready),
scheme = "Report Card",
include_letter = TRUE
)Visualise Data
Wetland areas can then be visualised in a series of maps and plots.
Maps
Maps are created using the following loop., empthasis is provided via a buffered red line around the wetland areas.
#create a column to hold palette
all_wetlands$Palette <- "#3C6ABE"
#create a buffered version to help identify wetland locations
wl_buffered <- all_wetlands |>
st_buffer(dist = 400)
#note we are just going to target the most recent year of data
for (i in unique(all_wetlands$BasinOrZone)){#for each basin
#select wetlands in basin
wl_target_basin <- all_wetlands |> filter(BasinOrZone == i, Year == max(all_wetlands$Year))
#select the buffered wetlands in basin
wl_target_buffered <- wl_buffered |> filter(BasinOrZone == i, Year == max(all_wetlands$Year))
#get the basin outline from the n3 region dataset
basin_outline <- n3_wl_region |> filter(BasinOrZone == i)
#get the region outline from the n3 region dataset
region_outline <- n3_wl_region |> filter(Region == unique(basin_outline$Region))
#create the water layer
water_layer <- maps_water_layer(
Basin = i,
WaterLines = TRUE,
WaterAreas = TRUE,
WaterLakes = TRUE,
StreamOrder = 3
)
#create the inset components
inset_components <- maps_inset_layer(basin_outline, region_outline, 1.1)
#build the map
wl_map <-
tm_shape(qld) +
tm_polygons(fill = "grey80", col = "black") +
tm_shape(basin_outline, is.main = TRUE) +
tm_polygons(fill = "grey90", col = "black") +
tm_shape(wl_target_basin) +
tm_polygons(
fill = "Palette",
fill.legend = tm_legend(title = "Wetlands"),
col = NULL,
fill.scale = tm_scale_categorical(
values = sort(unique(wl_target_basin$Palette)),
labels = "Wetlands"
)
) +
tm_shape(wl_target_buffered) +
tm_lines(col = "red") +
water_layer +
tm_layout(
legend.frame = TRUE,
legend.bg.color = "White",
legend.text.size = 0.7,
legend.position = c("left", "bottom"),
asp = 1.1
)
#edit names
i <- str_to_lower(i)
#save the map as a png
tmap_save(
wl_map,
filename = glue("{output_path}/maps/{i}_freshwater-wetlands_map.png"),
insets_tm = inset_components[[1]],
insets_vp = inset_components[[2]])
}Plots
Plots are created that focus on vegetation percentage change year on year.
#create custom palette
my_palette <- "#3C6ABE"
#assign names to my palette so ggplot can colour everything correctly
names(my_palette) <- "Wetlands"
#drop sub basins
wl_graded <- wl_graded |>
filter(BasinOrZone == SubBasinOrSubZone) |>
mutate(Vegetation = "Wetlands")
#list out basins
basins <- unique(wl_graded$BasinOrZone)
for (i in basins){
#select the target sub_basin
target_data <- wl_graded |> filter(BasinOrZone == i)
#change year to a factor to get the order right an drop units for area
target_data <- target_data |>
mutate(Year = factor(Year, levels = unique(target_data$Year)))
#plot
wl_plot <- ggplot(target_data, aes(PercentChange, Year, fill = Vegetation)) +
geom_bar(stat = "identity", position = "dodge") +
scale_x_continuous(trans = scales::pseudo_log_trans(base = 10)) +
scale_fill_manual(values = my_palette) +
theme(axis.text.x = element_text(colour = "black"),
axis.line.x = element_line(colour = "black"),
axis.text.y = element_text(colour = "black"),
axis.line.y = element_line(colour = "black"),
axis.ticks.y = element_blank(),
panel.background = element_blank()) +
geom_vline(xintercept = 0) +
labs(x = "Change (%)", y = "Year", fill = glue("{i} Basin"))
#edit names
i <- str_to_lower(i)
#save
ggsave(glue("{output_path}/plots/{i}_freshwater-wetlands_map.png"), wl_plot)
}